Package Bio :: Package Restriction :: Module Restriction
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.Restriction

   1  #!/usr/bin/env python 
   2  # 
   3  #      Restriction Analysis Libraries. 
   4  #      Copyright (C) 2004. Frederic Sohm. 
   5  # 
   6  # This code is part of the Biopython distribution and governed by its 
   7  # license.  Please see the LICENSE file that should have been included 
   8  # as part of this package. 
   9  # 
  10   
  11  """Restriction Enzyme classes. 
  12   
  13  Notes about the diverses class of the restriction enzyme implementation:: 
  14   
  15              RestrictionType is the type of all restriction enzymes. 
  16          ---------------------------------------------------------------------------- 
  17              AbstractCut implements some methods that are common to all enzymes. 
  18          ---------------------------------------------------------------------------- 
  19              NoCut, OneCut,TwoCuts   represent the number of double strand cuts 
  20                                      produced by the enzyme. 
  21                                      they correspond to the 4th field of the 
  22                                      rebase record emboss_e.NNN. 
  23                      0->NoCut    : the enzyme is not characterised. 
  24                      2->OneCut   : the enzyme produce one double strand cut. 
  25                      4->TwoCuts  : two double strand cuts. 
  26          ---------------------------------------------------------------------------- 
  27              Meth_Dep, Meth_Undep    represent the methylation susceptibility to 
  28                                      the enzyme. 
  29                                      Not implemented yet. 
  30          ---------------------------------------------------------------------------- 
  31              Palindromic,            if the site is palindromic or not. 
  32              NotPalindromic          allow some optimisations of the code. 
  33                                      No need to check the reverse strand 
  34                                      with palindromic sites. 
  35          ---------------------------------------------------------------------------- 
  36              Unknown, Blunt,         represent the overhang. 
  37              Ov5, Ov3                Unknown is here for symmetry reasons and 
  38                                      correspond to enzymes that are not 
  39                                      characterised in rebase. 
  40          ---------------------------------------------------------------------------- 
  41              Defined, Ambiguous,     represent the sequence of the overhang. 
  42              NotDefined 
  43                                      NotDefined is for enzymes not characterised 
  44                                      in rebase. 
  45   
  46                                      Defined correspond to enzymes that display 
  47                                      a constant overhang whatever the sequence. 
  48                                      ex : EcoRI. G^AATTC -> overhang :AATT 
  49                                                  CTTAA^G 
  50   
  51                                      Ambiguous : the overhang varies with the 
  52                                      sequence restricted. 
  53                                      Typically enzymes which cut outside their 
  54                                      restriction site or (but not always) 
  55                                      inside an ambiguous site. 
  56                                      ex: 
  57                                      AcuI CTGAAG(22/20)  -> overhang : NN 
  58                                      AasI GACNNN^NNNGTC  -> overhang : NN 
  59                                           CTGN^NNNNNCAG 
  60   
  61                  note : these 3 classes refers to the overhang not the site. 
  62                     So the enzyme ApoI (RAATTY) is defined even if its 
  63                     restriction site is ambiguous. 
  64   
  65                          ApoI R^AATTY -> overhang : AATT -> Defined 
  66                               YTTAA^R 
  67                     Accordingly, blunt enzymes are always Defined even 
  68                     when they cut outside their restriction site. 
  69          ---------------------------------------------------------------------------- 
  70              Not_available,          as found in rebase file emboss_r.NNN files. 
  71              Commercially_available 
  72                                      allow the selection of the enzymes 
  73                                      according to their suppliers to reduce the 
  74                                      quantity of results. 
  75                                      Also will allow the implementation of 
  76                                      buffer compatibility tables. Not 
  77                                      implemented yet. 
  78   
  79                                      the list of suppliers is extracted from 
  80                                      emboss_s.NNN 
  81          ---------------------------------------------------------------------------- 
  82  """ 
  83   
  84  from __future__ import print_function 
  85   
  86  import warnings 
  87   
  88  from Bio._py3k import zip 
  89  from Bio._py3k import filter 
  90  from Bio._py3k import range 
  91   
  92  import re 
  93  import itertools 
  94   
  95  from Bio.Seq import Seq, MutableSeq 
  96   
  97  from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict 
  98  from Bio.Restriction.Restriction_Dictionary import typedict 
  99  from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict 
 100  from Bio.Restriction.RanaConfig import ConsoleWidth, NameWidth, Indent, MaxSize 
 101  from Bio.Restriction.RanaConfig import ftp_proxy, ftp_Rebase 
 102  from Bio.Restriction.RanaConfig import ftp_emb_e, ftp_emb_s, ftp_emb_r 
 103  from Bio.Restriction.PrintFormat import PrintFormat 
 104  from Bio import BiopythonWarning 
105 106 107 # Used to use Bio.Restriction.DNAUtils.check_bases (and expose it under this 108 # namespace), but have deprecated that module. 109 110 111 -def _check_bases(seq_string):
112 """Check characters in a string (PRIVATE). 113 114 Remove digits and white space present in string. Allows any valid ambiguous 115 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted). 116 117 Other characters (e.g. symbols) trigger a TypeError. 118 119 Returns the string WITH A LEADING SPACE (!). This is for backwards 120 compatibility, and may in part be explained by the fact that 121 Bio.Restriction doesn't use zero based counting. 122 """ 123 # Remove white space and make upper case: 124 seq_string = "".join(seq_string.split()).upper() 125 # Remove digits 126 for c in "0123456789": 127 seq_string = seq_string.replace(c, "") 128 # Check only allowed IUPAC letters 129 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")): 130 raise TypeError("Invalid character found in %s" % repr(seq_string)) 131 return " " + seq_string
132 133 134 matching = {'A': 'ARWMHVDN', 'C': 'CYSMHBVN', 'G': 'GRSKBVDN', 135 'T': 'TYWKHBDN', 'R': 'ABDGHKMNSRWV', 'Y': 'CBDHKMNSTWVY', 136 'W': 'ABDHKMNRTWVY', 'S': 'CBDGHKMNSRVY', 'M': 'ACBDHMNSRWVY', 137 'K': 'BDGHKNSRTWVY', 'H': 'ACBDHKMNSRTWVY', 138 'B': 'CBDGHKMNSRTWVY', 'V': 'ACBDGHKMNSRWVY', 139 'D': 'ABDGHKMNSRTWVY', 'N': 'ACBDGHKMNSRTWVY'} 140 141 DNA = Seq
142 143 144 -class FormattedSeq(object):
145 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 146 147 Translate a Bio.Seq into a formatted sequence to be used with Restriction. 148 149 Roughly: 150 remove anything which is not IUPAC alphabet and then add a space 151 in front of the sequence to get a biological index instead of a 152 python index (i.e. index of the first base is 1 not 0). 153 154 Retains information about the shape of the molecule linear (default) 155 or circular. Restriction sites are search over the edges of circular 156 sequence. 157 """ 158
159 - def __init__(self, seq, linear=True):
160 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 161 162 seq is either a Bio.Seq, Bio.MutableSeq or a FormattedSeq. 163 if seq is a FormattedSeq, linear will have no effect on the 164 shape of the sequence. 165 """ 166 if isinstance(seq, (Seq, MutableSeq)): 167 stringy = str(seq) 168 self.lower = stringy.islower() 169 # Note this adds a leading space to the sequence (!) 170 self.data = _check_bases(stringy) 171 self.linear = linear 172 self.klass = seq.__class__ 173 self.alphabet = seq.alphabet 174 elif isinstance(seq, FormattedSeq): 175 self.lower = seq.lower 176 self.data = seq.data 177 self.linear = seq.linear 178 self.alphabet = seq.alphabet 179 self.klass = seq.klass 180 else: 181 raise TypeError('expected Seq or MutableSeq, got %s' % type(seq))
182
183 - def __len__(self):
184 return len(self.data) - 1
185
186 - def __repr__(self):
187 return 'FormattedSeq(%s, linear=%s)' % (repr(self[1:]), 188 repr(self.linear))
189
190 - def __eq__(self, other):
191 if isinstance(other, FormattedSeq): 192 if repr(self) == repr(other): 193 return True 194 else: 195 return False 196 return False
197
198 - def circularise(self):
199 """FS.circularise() -> circularise FS""" 200 self.linear = False 201 return
202
203 - def linearise(self):
204 """FS.linearise() -> linearise FS""" 205 self.linear = True 206 return
207
208 - def to_linear(self):
209 """FS.to_linear() -> new linear FS instance""" 210 new = self.__class__(self) 211 new.linear = True 212 return new
213
214 - def to_circular(self):
215 """FS.to_circular() -> new circular FS instance""" 216 new = self.__class__(self) 217 new.linear = False 218 return new
219
220 - def is_linear(self):
221 """FS.is_linear() -> bool. 222 223 True if the sequence will analysed as a linear sequence.""" 224 return self.linear
225
226 - def finditer(self, pattern, size):
227 """FS.finditer(pattern, size) -> list. 228 229 return a list of pattern into the sequence. 230 the list is made of tuple (location, pattern.group). 231 the latter is used with non palindromic sites. 232 pattern is the regular expression pattern corresponding to the 233 enzyme restriction site. 234 size is the size of the restriction enzyme recognition-site size. 235 """ 236 if self.is_linear(): 237 data = self.data 238 else: 239 data = self.data + self.data[1:size] 240 return [(i.start(), i.group) for i in re.finditer(pattern, data)]
241
242 - def __getitem__(self, i):
243 if self.lower: 244 return self.klass((self.data[i]).lower(), self.alphabet) 245 return self.klass(self.data[i], self.alphabet)
246
247 248 -class RestrictionType(type):
249 """RestrictionType. Type from which derives all enzyme classes. 250 251 Implement the operator methods. 252 """ 253
254 - def __init__(cls, name='', bases=(), dct=None):
255 """RE(name, bases, dct) -> RestrictionType instance. 256 257 Not intended to be used in normal operation. The enzymes are 258 instantiated when importing the module. 259 260 see below.""" 261 if "-" in name: 262 raise ValueError("Problem with hyphen in %s as enzyme name" 263 % repr(name)) 264 # 2011/11/26 - Nobody knows what this call was supposed to accomplish, 265 # but all unit tests seem to pass without it. 266 # super(RestrictionType, cls).__init__(cls, name, bases, dct) 267 try: 268 cls.compsite = re.compile(cls.compsite) 269 except Exception as err: 270 raise ValueError("Problem with regular expression, re.compiled(%s)" 271 % repr(cls.compsite))
272
273 - def __add__(cls, other):
274 """RE.__add__(other) -> RestrictionBatch(). 275 276 if other is an enzyme returns a batch of the two enzymes. 277 if other is already a RestrictionBatch add enzyme to it. 278 """ 279 if isinstance(other, RestrictionType): 280 return RestrictionBatch([cls, other]) 281 elif isinstance(other, RestrictionBatch): 282 return other.add_nocheck(cls) 283 else: 284 raise TypeError
285
286 - def __div__(cls, other):
287 """RE.__div__(other) -> list. 288 289 RE/other 290 returns RE.search(other).""" 291 return cls.search(other)
292
293 - def __rdiv__(cls, other):
294 """RE.__rdiv__(other) -> list. 295 296 other/RE 297 returns RE.search(other).""" 298 return cls.search(other)
299
300 - def __truediv__(cls, other):
301 """RE.__truediv__(other) -> list. 302 303 RE/other 304 returns RE.search(other).""" 305 return cls.search(other)
306
307 - def __rtruediv__(cls, other):
308 """RE.__rtruediv__(other) -> list. 309 310 other/RE 311 returns RE.search(other).""" 312 return cls.search(other)
313
314 - def __floordiv__(cls, other):
315 """RE.__floordiv__(other) -> list. 316 317 RE//other 318 returns RE.catalyse(other).""" 319 return cls.catalyse(other)
320
321 - def __rfloordiv__(cls, other):
322 """RE.__rfloordiv__(other) -> list. 323 324 other//RE 325 returns RE.catalyse(other).""" 326 return cls.catalyse(other)
327
328 - def __str__(cls):
329 """RE.__str__() -> str. 330 331 return the name of the enzyme.""" 332 return cls.__name__
333
334 - def __repr__(cls):
335 """RE.__repr__() -> str. 336 337 used with eval or exec will instantiate the enzyme.""" 338 return "%s" % cls.__name__
339
340 - def __len__(cls):
341 """RE.__len__() -> int. 342 343 length of the recognition site.""" 344 return cls.size
345
346 - def __hash__(cls):
347 # Python default is to use id(...) 348 # This is consistent with the __eq__ implementation 349 return id(cls)
350
351 - def __eq__(cls, other):
352 """RE == other -> bool 353 354 True if RE and other are the same enzyme. 355 356 Specifically this checks they are the same Python object. 357 """ 358 # assert (id(cls)==id(other)) == (other is cls) == (cls is other) 359 return id(cls) == id(other)
360
361 - def __ne__(cls, other):
362 """RE != other -> bool. 363 isoschizomer strict, same recognition site, same restriction -> False 364 all the other-> True 365 366 WARNING - This is not the inverse of the __eq__ method. 367 """ 368 if not isinstance(other, RestrictionType): 369 return True 370 elif cls.charac == other.charac: 371 return False 372 else: 373 return True
374
375 - def __rshift__(cls, other):
376 """RE >> other -> bool. 377 378 neoschizomer : same recognition site, different restriction. -> True 379 all the others : -> False 380 """ 381 if not isinstance(other, RestrictionType): 382 return False 383 elif cls.site == other.site and cls.charac != other.charac: 384 return True 385 else: 386 return False
387
388 - def __mod__(cls, other):
389 """a % b -> bool. 390 391 Test compatibility of the overhang of a and b. 392 True if a and b have compatible overhang. 393 """ 394 if not isinstance(other, RestrictionType): 395 raise TypeError( 396 'expected RestrictionType, got %s instead' % type(other)) 397 return cls._mod1(other)
398
399 - def __ge__(cls, other):
400 """a >= b -> bool. 401 402 a is greater or equal than b if the a site is longer than b site. 403 if their site have the same length sort by alphabetical order of their 404 names.""" 405 if not isinstance(other, RestrictionType): 406 raise NotImplementedError 407 if len(cls) > len(other): 408 return True 409 elif cls.size == len(other) and cls.__name__ >= other.__name__: 410 return True 411 else: 412 return False
413
414 - def __gt__(cls, other):
415 """a > b -> bool. 416 417 sorting order: 418 1. size of the recognition site. 419 2. if equal size, alphabetical order of the names.""" 420 if not isinstance(other, RestrictionType): 421 raise NotImplementedError 422 if len(cls) > len(other): 423 return True 424 elif cls.size == len(other) and cls.__name__ > other.__name__: 425 return True 426 else: 427 return False
428
429 - def __le__(cls, other):
430 """a <= b -> bool. 431 432 sorting order: 433 1. size of the recognition site. 434 2. if equal size, alphabetical order of the names. 435 """ 436 if not isinstance(other, RestrictionType): 437 raise NotImplementedError 438 elif len(cls) < len(other): 439 return True 440 elif len(cls) == len(other) and cls.__name__ <= other.__name__: 441 return True 442 else: 443 return False
444
445 - def __lt__(cls, other):
446 """a < b -> bool. 447 448 sorting order: 449 1. size of the recognition site. 450 2. if equal size, alphabetical order of the names. 451 """ 452 if not isinstance(other, RestrictionType): 453 raise NotImplementedError 454 elif len(cls) < len(other): 455 return True 456 elif len(cls) == len(other) and cls.__name__ < other.__name__: 457 return True 458 else: 459 return False
460
461 462 -class AbstractCut(RestrictionType):
463 """Implement the methods that are common to all restriction enzymes. 464 465 All the methods are classmethod. 466 467 For internal use only. Not meant to be instantiate. 468 """ 469 470 @classmethod
471 - def search(cls, dna, linear=True):
472 """RE.search(dna, linear=True) -> list. 473 474 return a list of all the site of RE in dna. Compensate for circular 475 sequences and so on. 476 477 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 478 479 if linear is False, the restriction sites than span over the boundaries 480 will be included. 481 482 The positions are the first base of the 3' fragment, 483 i.e. the first base after the position the enzyme will cut. 484 """ 485 # 486 # Separating search from _search allow a (very limited) optimisation 487 # of the search when using a batch of restriction enzymes. 488 # in this case the DNA is tested once by the class which implements 489 # the batch instead of being tested by each enzyme single. 490 # see RestrictionBatch.search() for example. 491 # 492 if isinstance(dna, FormattedSeq): 493 cls.dna = dna 494 return cls._search() 495 else: 496 cls.dna = FormattedSeq(dna, linear) 497 return cls._search()
498 499 @classmethod
500 - def all_suppliers(cls):
501 """RE.all_suppliers -> print all the suppliers of R""" 502 supply = sorted(x[0] for x in suppliers_dict.values()) 503 print(",\n".join(supply)) 504 return
505 506 @classmethod
507 - def is_equischizomer(cls, other):
508 """RE.is_equischizomers(other) -> bool. 509 510 True if other is an isoschizomer of RE. 511 False else. 512 513 equischizomer <=> same site, same position of restriction. 514 """ 515 return not cls != other
516 517 @classmethod
518 - def is_neoschizomer(cls, other):
519 """RE.is_neoschizomers(other) -> bool. 520 521 True if other is an isoschizomer of RE. 522 False else. 523 524 neoschizomer <=> same site, different position of restriction. 525 """ 526 return cls >> other
527 528 @classmethod
529 - def is_isoschizomer(cls, other):
530 """RE.is_isoschizomers(other) -> bool. 531 532 True if other is an isoschizomer of RE. 533 False else. 534 535 isoschizomer <=> same site.""" 536 return (not cls != other) or cls >> other
537 538 @classmethod
539 - def equischizomers(cls, batch=None):
540 """RE.equischizomers([batch]) -> list. 541 542 return a tuple of all the isoschizomers of RE. 543 if batch is supplied it is used instead of the default AllEnzymes. 544 545 equischizomer <=> same site, same position of restriction. 546 """ 547 if not batch: 548 batch = AllEnzymes 549 r = [x for x in batch if not cls != x] 550 i = r.index(cls) 551 del r[i] 552 r.sort() 553 return r
554 555 @classmethod
556 - def neoschizomers(cls, batch=None):
557 """RE.neoschizomers([batch]) -> list. 558 559 return a tuple of all the neoschizomers of RE. 560 if batch is supplied it is used instead of the default AllEnzymes. 561 562 neoschizomer <=> same site, different position of restriction.""" 563 if not batch: 564 batch = AllEnzymes 565 r = sorted(x for x in batch if cls >> x) 566 return r
567 568 @classmethod
569 - def isoschizomers(cls, batch=None):
570 """RE.isoschizomers([batch]) -> list. 571 572 return a tuple of all the equischizomers and neoschizomers of RE. 573 if batch is supplied it is used instead of the default AllEnzymes. 574 """ 575 if not batch: 576 batch = AllEnzymes 577 r = [x for x in batch if (cls >> x) or (not cls != x)] 578 i = r.index(cls) 579 del r[i] 580 r.sort() 581 return r
582 583 @classmethod
584 - def frequency(cls):
585 """RE.frequency() -> int. 586 587 frequency of the site.""" 588 return cls.freq
589
590 591 -class NoCut(AbstractCut):
592 """Implement the methods specific to the enzymes that do not cut. 593 594 These enzymes are generally enzymes that have been only partially 595 characterised and the way they cut the DNA is unknow or enzymes for 596 which the pattern of cut is to complex to be recorded in Rebase 597 (ncuts values of 0 in emboss_e.###). 598 599 When using search() with these enzymes the values returned are at the start 600 of the restriction site. 601 602 Their catalyse() method returns a TypeError. 603 604 Unknown and NotDefined are also part of the base classes of these enzymes. 605 606 Internal use only. Not meant to be instantiated. 607 """ 608 609 @classmethod
610 - def cut_once(cls):
611 """RE.cut_once() -> bool. 612 613 True if the enzyme cut the sequence one time on each strand.""" 614 return False
615 616 @classmethod
617 - def cut_twice(cls):
618 """RE.cut_twice() -> bool. 619 620 True if the enzyme cut the sequence twice on each strand.""" 621 return False
622 623 @classmethod
624 - def _modify(cls, location):
625 """RE._modify(location) -> int. 626 627 for internal use only. 628 629 location is an integer corresponding to the location of the match for 630 the enzyme pattern in the sequence. 631 _modify returns the real place where the enzyme will cut. 632 633 example:: 634 635 EcoRI pattern : GAATTC 636 EcoRI will cut after the G. 637 so in the sequence: 638 ______ 639 GAATACACGGAATTCGA 640 | 641 10 642 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 643 EcoRI cut after the G so: 644 EcoRI._modify(10) -> 11. 645 646 if the enzyme cut twice _modify will returns two integer corresponding 647 to each cutting site. 648 """ 649 yield location
650 651 @classmethod
652 - def _rev_modify(cls, location):
653 """RE._rev_modify(location) -> generator of int. 654 655 for internal use only. 656 657 as _modify for site situated on the antiparallel strand when the 658 enzyme is not palindromic 659 """ 660 yield location
661 662 @classmethod
663 - def characteristic(cls):
664 """RE.characteristic() -> tuple. 665 666 the tuple contains the attributes: 667 fst5 -> first 5' cut ((current strand) or None 668 fst3 -> first 3' cut (complementary strand) or None 669 scd5 -> second 5' cut (current strand) or None 670 scd5 -> second 3' cut (complementary strand) or None 671 site -> recognition site. 672 """ 673 return None, None, None, None, cls.site
674
675 676 -class OneCut(AbstractCut):
677 """Implement the methods specific to the enzymes that cut the DNA only once 678 679 Correspond to ncuts values of 2 in emboss_e.### 680 681 Internal use only. Not meant to be instantiated. 682 """ 683 684 @classmethod
685 - def cut_once(cls):
686 """RE.cut_once() -> bool. 687 688 True if the enzyme cut the sequence one time on each strand. 689 """ 690 return True
691 692 @classmethod
693 - def cut_twice(cls):
694 """RE.cut_twice() -> bool. 695 696 True if the enzyme cut the sequence twice on each strand. 697 """ 698 return False
699 700 @classmethod
701 - def _modify(cls, location):
702 """RE._modify(location) -> int. 703 704 for internal use only. 705 706 location is an integer corresponding to the location of the match for 707 the enzyme pattern in the sequence. 708 _modify returns the real place where the enzyme will cut. 709 710 example:: 711 712 EcoRI pattern : GAATTC 713 EcoRI will cut after the G. 714 so in the sequence: 715 ______ 716 GAATACACGGAATTCGA 717 | 718 10 719 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 720 EcoRI cut after the G so: 721 EcoRI._modify(10) -> 11. 722 723 if the enzyme cut twice _modify will returns two integer corresponding 724 to each cutting site. 725 """ 726 yield location + cls.fst5
727 728 @classmethod
729 - def _rev_modify(cls, location):
730 """RE._rev_modify(location) -> generator of int. 731 732 for internal use only. 733 734 as _modify for site situated on the antiparallel strand when the 735 enzyme is not palindromic 736 """ 737 yield location - cls.fst3
738 739 @classmethod
740 - def characteristic(cls):
741 """RE.characteristic() -> tuple. 742 743 the tuple contains the attributes: 744 fst5 -> first 5' cut ((current strand) or None 745 fst3 -> first 3' cut (complementary strand) or None 746 scd5 -> second 5' cut (current strand) or None 747 scd5 -> second 3' cut (complementary strand) or None 748 site -> recognition site. 749 """ 750 return cls.fst5, cls.fst3, None, None, cls.site
751
752 753 -class TwoCuts(AbstractCut):
754 """Implement the methods specific to the enzymes that cut the DNA twice 755 756 Correspond to ncuts values of 4 in emboss_e.### 757 758 Internal use only. Not meant to be instantiated.""" 759 760 @classmethod
761 - def cut_once(cls):
762 """RE.cut_once() -> bool. 763 764 True if the enzyme cut the sequence one time on each strand.""" 765 return False
766 767 @classmethod
768 - def cut_twice(cls):
769 """RE.cut_twice() -> bool. 770 771 True if the enzyme cut the sequence twice on each strand. 772 """ 773 return True
774 775 @classmethod
776 - def _modify(cls, location):
777 """RE._modify(location) -> int. 778 779 for internal use only. 780 781 location is an integer corresponding to the location of the match for 782 the enzyme pattern in the sequence. 783 _modify returns the real place where the enzyme will cut. 784 785 example:: 786 787 EcoRI pattern : GAATTC 788 EcoRI will cut after the G. 789 so in the sequence: 790 ______ 791 GAATACACGGAATTCGA 792 | 793 10 794 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 795 EcoRI cut after the G so: 796 EcoRI._modify(10) -> 11. 797 798 if the enzyme cut twice _modify will returns two integer corresponding 799 to each cutting site. 800 """ 801 yield location + cls.fst5 802 yield location + cls.scd5
803 804 @classmethod
805 - def _rev_modify(cls, location):
806 """RE._rev_modify(location) -> generator of int. 807 808 for internal use only. 809 810 as _modify for site situated on the antiparallel strand when the 811 enzyme is not palindromic 812 """ 813 yield location - cls.fst3 814 yield location - cls.scd3
815 816 @classmethod
817 - def characteristic(cls):
818 """RE.characteristic() -> tuple. 819 820 the tuple contains the attributes: 821 fst5 -> first 5' cut ((current strand) or None 822 fst3 -> first 3' cut (complementary strand) or None 823 scd5 -> second 5' cut (current strand) or None 824 scd5 -> second 3' cut (complementary strand) or None 825 site -> recognition site. 826 """ 827 return cls.fst5, cls.fst3, cls.scd5, cls.scd3, cls.site
828
829 830 -class Meth_Dep(AbstractCut):
831 """Implement the information about methylation. 832 833 Enzymes of this class possess a site which is methylable. 834 """ 835 836 @classmethod
837 - def is_methylable(cls):
838 """RE.is_methylable() -> bool. 839 840 True if the recognition site is a methylable. 841 """ 842 return True
843
844 845 -class Meth_Undep(AbstractCut):
846 """Implement information about methylation sensitibility. 847 848 Enzymes of this class are not sensible to methylation. 849 """ 850 851 @classmethod
852 - def is_methylable(cls):
853 """RE.is_methylable() -> bool. 854 855 True if the recognition site is a methylable. 856 """ 857 return False
858
859 860 -class Palindromic(AbstractCut):
861 """Implement the methods specific to the enzymes which are palindromic 862 863 palindromic means : the recognition site and its reverse complement are 864 identical. 865 Remarks : an enzyme with a site CGNNCG is palindromic even if some 866 of the sites that it will recognise are not. 867 for example here : CGAACG 868 869 Internal use only. Not meant to be instantiated.""" 870 871 @classmethod
872 - def _search(cls):
873 """RE._search() -> list. 874 875 for internal use only. 876 877 implement the search method for palindromic and non palindromic enzyme. 878 """ 879 siteloc = cls.dna.finditer(cls.compsite, cls.size) 880 cls.results = [r for s, g in siteloc for r in cls._modify(s)] 881 if cls.results: 882 cls._drop() 883 return cls.results
884 885 @classmethod
886 - def is_palindromic(cls):
887 """RE.is_palindromic() -> bool. 888 889 True if the recognition site is a palindrom. 890 """ 891 return True
892
893 894 -class NonPalindromic(AbstractCut):
895 """Implement the methods specific to the enzymes which are not palindromic 896 897 palindromic means : the recognition site and its reverse complement are 898 identical. 899 900 Internal use only. Not meant to be instantiated.""" 901 902 @classmethod
903 - def _search(cls):
904 """RE._search() -> list. 905 906 for internal use only. 907 908 implement the search method for palindromic and non palindromic enzyme. 909 """ 910 iterator = cls.dna.finditer(cls.compsite, cls.size) 911 cls.results = [] 912 modif = cls._modify 913 revmodif = cls._rev_modify 914 s = str(cls) 915 cls.on_minus = [] 916 for start, group in iterator: 917 if group(s): 918 cls.results += [r for r in modif(start)] 919 else: 920 cls.on_minus += [r for r in revmodif(start)] 921 cls.results += cls.on_minus 922 if cls.results: 923 cls.results.sort() 924 cls._drop() 925 return cls.results
926 927 @classmethod
928 - def is_palindromic(cls):
929 """RE.is_palindromic() -> bool. 930 931 True if the recognition site is a palindrom. 932 """ 933 return False
934
935 936 -class Unknown(AbstractCut):
937 """Implement the methods specific to the enzymes for which the overhang 938 is unknown. 939 940 These enzymes are also NotDefined and NoCut. 941 942 Internal use only. Not meant to be instantiated. 943 """ 944 945 @classmethod
946 - def catalyse(cls, dna, linear=True):
947 """RE.catalyse(dna, linear=True) -> tuple of DNA. 948 RE.catalyze(dna, linear=True) -> tuple of DNA. 949 950 return a tuple of dna as will be produced by using RE to restrict the 951 dna. 952 953 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 954 955 if linear is False, the sequence is considered to be circular and the 956 output will be modified accordingly. 957 """ 958 raise NotImplementedError('%s restriction is unknown.' 959 % cls.__name__)
960 catalyze = catalyse 961 962 @classmethod
963 - def is_blunt(cls):
964 """RE.is_blunt() -> bool. 965 966 True if the enzyme produces blunt end. 967 968 see also: 969 RE.is_3overhang() 970 RE.is_5overhang() 971 RE.is_unknown() 972 """ 973 return False
974 975 @classmethod
976 - def is_5overhang(cls):
977 """RE.is_5overhang() -> bool. 978 979 True if the enzyme produces 5' overhang sticky end. 980 981 see also: 982 RE.is_3overhang() 983 RE.is_blunt() 984 RE.is_unknown() 985 """ 986 return False
987 988 @classmethod
989 - def is_3overhang(cls):
990 """RE.is_3overhang() -> bool. 991 992 True if the enzyme produces 3' overhang sticky end. 993 994 see also: 995 RE.is_5overhang() 996 RE.is_blunt() 997 RE.is_unknown() 998 """ 999 return False
1000 1001 @classmethod
1002 - def overhang(cls):
1003 """RE.overhang() -> str. type of overhang of the enzyme., 1004 1005 can be "3' overhang", "5' overhang", "blunt", "unknown" 1006 """ 1007 return 'unknown'
1008 1009 @classmethod
1010 - def compatible_end(cls):
1011 """RE.compatible_end() -> list. 1012 1013 list of all the enzymes that share compatible end with RE. 1014 """ 1015 return []
1016 1017 @classmethod
1018 - def _mod1(cls, other):
1019 """RE._mod1(other) -> bool. 1020 1021 for internal use only 1022 1023 test for the compatibility of restriction ending of RE and other. 1024 """ 1025 return False
1026
1027 1028 -class Blunt(AbstractCut):
1029 """Implement the methods specific to the enzymes for which the overhang 1030 is blunt. 1031 1032 The enzyme cuts the + strand and the - strand of the DNA at the same 1033 place. 1034 1035 Internal use only. Not meant to be instantiated. 1036 """ 1037 1038 @classmethod
1039 - def catalyse(cls, dna, linear=True):
1040 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1041 RE.catalyze(dna, linear=True) -> tuple of DNA. 1042 1043 return a tuple of dna as will be produced by using RE to restrict the 1044 dna. 1045 1046 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1047 1048 if linear is False, the sequence is considered to be circular and the 1049 output will be modified accordingly. 1050 """ 1051 r = cls.search(dna, linear) 1052 d = cls.dna 1053 if not r: 1054 return d[1:], 1055 fragments = [] 1056 length = len(r) - 1 1057 if d.is_linear(): 1058 # 1059 # START of the sequence to FIRST site. 1060 # 1061 fragments.append(d[1:r[0]]) 1062 if length: 1063 # 1064 # if more than one site add them. 1065 # 1066 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1067 # 1068 # LAST site to END of the sequence. 1069 # 1070 fragments.append(d[r[-1]:]) 1071 else: 1072 # 1073 # circular : bridge LAST site to FIRST site. 1074 # 1075 fragments.append(d[r[-1]:] + d[1:r[0]]) 1076 if not length: 1077 # 1078 # one site we finish here. 1079 # 1080 return tuple(fragments) 1081 # 1082 # add the others. 1083 # 1084 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1085 return tuple(fragments)
1086 catalyze = catalyse 1087 1088 @classmethod
1089 - def is_blunt(cls):
1090 """RE.is_blunt() -> bool. 1091 1092 True if the enzyme produces blunt end. 1093 1094 see also: 1095 RE.is_3overhang() 1096 RE.is_5overhang() 1097 RE.is_unknown() 1098 """ 1099 return True
1100 1101 @classmethod
1102 - def is_5overhang(cls):
1103 """RE.is_5overhang() -> bool. 1104 1105 True if the enzyme produces 5' overhang sticky end. 1106 1107 see also: 1108 RE.is_3overhang() 1109 RE.is_blunt() 1110 RE.is_unknown() 1111 """ 1112 return False
1113 1114 @classmethod
1115 - def is_3overhang(cls):
1116 """RE.is_3overhang() -> bool. 1117 1118 True if the enzyme produces 3' overhang sticky end. 1119 1120 see also: 1121 RE.is_5overhang() 1122 RE.is_blunt() 1123 RE.is_unknown() 1124 """ 1125 return False
1126 1127 @classmethod
1128 - def overhang(cls):
1129 """RE.overhang() -> str. type of overhang of the enzyme., 1130 1131 can be "3' overhang", "5' overhang", "blunt", "unknown" 1132 """ 1133 return 'blunt'
1134 1135 @classmethod
1136 - def compatible_end(cls, batch=None):
1137 """RE.compatible_end() -> list. 1138 1139 list of all the enzymes that share compatible end with RE. 1140 """ 1141 if not batch: 1142 batch = AllEnzymes 1143 r = sorted(x for x in iter(AllEnzymes) if x.is_blunt()) 1144 return r
1145 1146 @staticmethod
1147 - def _mod1(other):
1148 """RE._mod1(other) -> bool. 1149 1150 for internal use only 1151 1152 test for the compatibility of restriction ending of RE and other. 1153 """ 1154 return issubclass(other, Blunt)
1155
1156 1157 -class Ov5(AbstractCut):
1158 """Implement the methods specific to the enzymes for which the overhang 1159 is recessed in 3'. 1160 1161 The enzyme cuts the + strand after the - strand of the DNA. 1162 1163 Internal use only. Not meant to be instantiated. 1164 """ 1165 1166 @classmethod
1167 - def catalyse(cls, dna, linear=True):
1168 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1169 RE.catalyze(dna, linear=True) -> tuple of DNA. 1170 1171 return a tuple of dna as will be produced by using RE to restrict the 1172 dna. 1173 1174 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1175 1176 if linear is False, the sequence is considered to be circular and the 1177 output will be modified accordingly. 1178 """ 1179 r = cls.search(dna, linear) 1180 d = cls.dna 1181 if not r: 1182 return d[1:], 1183 length = len(r) - 1 1184 fragments = [] 1185 if d.is_linear(): 1186 # 1187 # START of the sequence to FIRST site. 1188 # 1189 fragments.append(d[1:r[0]]) 1190 if length: 1191 # 1192 # if more than one site add them. 1193 # 1194 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1195 # 1196 # LAST site to END of the sequence. 1197 # 1198 fragments.append(d[r[-1]:]) 1199 else: 1200 # 1201 # circular : bridge LAST site to FIRST site. 1202 # 1203 fragments.append(d[r[-1]:] + d[1:r[0]]) 1204 if not length: 1205 # 1206 # one site we finish here. 1207 # 1208 return tuple(fragments) 1209 # 1210 # add the others. 1211 # 1212 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1213 return tuple(fragments)
1214 catalyze = catalyse 1215 1216 @classmethod
1217 - def is_blunt(cls):
1218 """RE.is_blunt() -> bool. 1219 1220 True if the enzyme produces blunt end. 1221 1222 see also: 1223 RE.is_3overhang() 1224 RE.is_5overhang() 1225 RE.is_unknown() 1226 """ 1227 return False
1228 1229 @classmethod
1230 - def is_5overhang(cls):
1231 """RE.is_5overhang() -> bool. 1232 1233 True if the enzyme produces 5' overhang sticky end. 1234 1235 see also: 1236 RE.is_3overhang() 1237 RE.is_blunt() 1238 RE.is_unknown() 1239 """ 1240 return True
1241 1242 @classmethod
1243 - def is_3overhang(cls):
1244 """RE.is_3overhang() -> bool. 1245 1246 True if the enzyme produces 3' overhang sticky end. 1247 1248 see also: 1249 RE.is_5overhang() 1250 RE.is_blunt() 1251 RE.is_unknown() 1252 """ 1253 return False
1254 1255 @classmethod
1256 - def overhang(cls):
1257 """RE.overhang() -> str. type of overhang of the enzyme., 1258 1259 can be "3' overhang", "5' overhang", "blunt", "unknown" 1260 """ 1261 return "5' overhang"
1262 1263 @classmethod
1264 - def compatible_end(cls, batch=None):
1265 """RE.compatible_end() -> list. 1266 1267 list of all the enzymes that share compatible end with RE.""" 1268 if not batch: 1269 batch = AllEnzymes 1270 r = sorted(x for x in iter(AllEnzymes) if x.is_5overhang() and 1271 x % cls) 1272 return r
1273 1274 @classmethod
1275 - def _mod1(cls, other):
1276 """RE._mod1(other) -> bool. 1277 1278 for internal use only 1279 1280 test for the compatibility of restriction ending of RE and other. 1281 """ 1282 if issubclass(other, Ov5): 1283 return cls._mod2(other) 1284 else: 1285 return False
1286
1287 1288 -class Ov3(AbstractCut):
1289 """Implement the methods specific to the enzymes for which the overhang 1290 is recessed in 5'. 1291 1292 The enzyme cuts the - strand after the + strand of the DNA. 1293 1294 Internal use only. Not meant to be instantiated. 1295 """ 1296 1297 @classmethod
1298 - def catalyse(cls, dna, linear=True):
1299 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1300 RE.catalyze(dna, linear=True) -> tuple of DNA. 1301 1302 return a tuple of dna as will be produced by using RE to restrict the 1303 dna. 1304 1305 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1306 1307 if linear is False, the sequence is considered to be circular and the 1308 output will be modified accordingly. 1309 """ 1310 r = cls.search(dna, linear) 1311 d = cls.dna 1312 if not r: 1313 return d[1:], 1314 fragments = [] 1315 length = len(r) - 1 1316 if d.is_linear(): 1317 # 1318 # START of the sequence to FIRST site. 1319 # 1320 fragments.append(d[1:r[0]]) 1321 if length: 1322 # 1323 # if more than one site add them. 1324 # 1325 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1326 # 1327 # LAST site to END of the sequence. 1328 # 1329 fragments.append(d[r[-1]:]) 1330 else: 1331 # 1332 # circular : bridge LAST site to FIRST site. 1333 # 1334 fragments.append(d[r[-1]:] + d[1:r[0]]) 1335 if not length: 1336 # 1337 # one site we finish here. 1338 # 1339 return tuple(fragments) 1340 # 1341 # add the others. 1342 # 1343 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1344 return tuple(fragments)
1345 catalyze = catalyse 1346 1347 @classmethod
1348 - def is_blunt(cls):
1349 """RE.is_blunt() -> bool. 1350 1351 True if the enzyme produces blunt end. 1352 1353 see also: 1354 RE.is_3overhang() 1355 RE.is_5overhang() 1356 RE.is_unknown() 1357 """ 1358 return False
1359 1360 @classmethod
1361 - def is_5overhang(cls):
1362 """RE.is_5overhang() -> bool. 1363 1364 True if the enzyme produces 5' overhang sticky end. 1365 1366 see also: 1367 RE.is_3overhang() 1368 RE.is_blunt() 1369 RE.is_unknown() 1370 """ 1371 return False
1372 1373 @classmethod
1374 - def is_3overhang(cls):
1375 """RE.is_3overhang() -> bool. 1376 1377 True if the enzyme produces 3' overhang sticky end. 1378 1379 see also: 1380 RE.is_5overhang() 1381 RE.is_blunt() 1382 RE.is_unknown() 1383 """ 1384 return True
1385 1386 @classmethod
1387 - def overhang(cls):
1388 """RE.overhang() -> str. type of overhang of the enzyme., 1389 1390 can be "3' overhang", "5' overhang", "blunt", "unknown" 1391 """ 1392 return "3' overhang"
1393 1394 @classmethod
1395 - def compatible_end(cls, batch=None):
1396 """RE.compatible_end() -> list. 1397 1398 list of all the enzymes that share compatible end with RE. 1399 """ 1400 if not batch: 1401 batch = AllEnzymes 1402 r = sorted(x for x in iter(AllEnzymes) if x.is_3overhang() and 1403 x % cls) 1404 return r
1405 1406 @classmethod
1407 - def _mod1(cls, other):
1408 """RE._mod1(other) -> bool. 1409 1410 for internal use only 1411 1412 test for the compatibility of restriction ending of RE and other. 1413 """ 1414 # 1415 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1416 # 1417 if issubclass(other, Ov3): 1418 return cls._mod2(other) 1419 else: 1420 return False
1421
1422 1423 -class Defined(AbstractCut):
1424 """Implement the methods specific to the enzymes for which the overhang 1425 and the cut are not variable. 1426 1427 Typical example : EcoRI -> G^AATT_C 1428 The overhang will always be AATT 1429 Notes: 1430 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N 1431 There overhang is always the same : blunt! 1432 1433 Internal use only. Not meant to be instantiated.""" 1434 1435 @classmethod
1436 - def _drop(cls):
1437 """RE._drop() -> list. 1438 1439 for internal use only. 1440 1441 drop the site that are situated outside the sequence in linear 1442 sequence. modify the index for site in circular sequences. 1443 """ 1444 # 1445 # remove or modify the results that are outside the sequence. 1446 # This is necessary since after finding the site we add the distance 1447 # from the site to the cut with the _modify and _rev_modify methods. 1448 # For linear we will remove these sites altogether. 1449 # For circular sequence, we modify the result rather than _drop it 1450 # since the site is in the sequence. 1451 # 1452 length = len(cls.dna) 1453 drop = itertools.dropwhile 1454 take = itertools.takewhile 1455 if cls.dna.is_linear(): 1456 cls.results = [x for x in drop(lambda x:x < 1, cls.results)] 1457 cls.results = [x for x in take(lambda x:x < length, cls.results)] 1458 else: 1459 for index, location in enumerate(cls.results): 1460 if location < 1: 1461 cls.results[index] += length 1462 else: 1463 break 1464 for index, location in enumerate(cls.results[::-1]): 1465 if location > length: 1466 cls.results[-(index + 1)] -= length 1467 else: 1468 break 1469 return
1470 1471 @classmethod
1472 - def is_defined(cls):
1473 """RE.is_defined() -> bool. 1474 1475 True if the sequence recognised and cut is constant, 1476 i.e. the recognition site is not degenerated AND the enzyme cut inside 1477 the site. 1478 1479 see also: 1480 RE.is_ambiguous() 1481 RE.is_unknown() 1482 """ 1483 return True
1484 1485 @classmethod
1486 - def is_ambiguous(cls):
1487 """RE.is_ambiguous() -> bool. 1488 1489 True if the sequence recognised and cut is ambiguous, 1490 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1491 the site. 1492 1493 see also: 1494 RE.is_defined() 1495 RE.is_unknown() 1496 """ 1497 return False
1498 1499 @classmethod
1500 - def is_unknown(cls):
1501 """RE.is_unknown() -> bool. 1502 1503 True if the sequence is unknown, 1504 i.e. the recognition site has not been characterised yet. 1505 1506 see also: 1507 RE.is_defined() 1508 RE.is_ambiguous() 1509 """ 1510 return False
1511 1512 @classmethod
1513 - def elucidate(cls):
1514 """RE.elucidate() -> str 1515 1516 return a representation of the site with the cut on the (+) strand 1517 represented as '^' and the cut on the (-) strand as '_'. 1518 ie: 1519 >>> EcoRI.elucidate() # 5' overhang 1520 'G^AATT_C' 1521 >>> KpnI.elucidate() # 3' overhang 1522 'G_GTAC^C' 1523 >>> EcoRV.elucidate() # blunt 1524 'GAT^_ATC' 1525 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1526 '? GTATAC ?' 1527 >>> 1528 """ 1529 f5 = cls.fst5 1530 f3 = cls.fst3 1531 site = cls.site 1532 if cls.cut_twice(): 1533 re = 'cut twice, not yet implemented sorry.' 1534 elif cls.is_5overhang(): 1535 if f5 == f3 == 0: 1536 re = 'N^' + cls.site + '_N' 1537 elif f3 == 0: 1538 re = site[:f5] + '^' + site[f5:] + '_N' 1539 else: 1540 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1541 elif cls.is_blunt(): 1542 re = site[:f5] + '^_' + site[f5:] 1543 else: 1544 if f5 == f3 == 0: 1545 re = 'N_' + site + '^N' 1546 else: 1547 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:] 1548 return re
1549 1550 @classmethod
1551 - def _mod2(cls, other):
1552 """RE._mod2(other) -> bool. 1553 1554 for internal use only 1555 1556 test for the compatibility of restriction ending of RE and other. 1557 """ 1558 # 1559 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1560 # 1561 if other.ovhgseq == cls.ovhgseq: 1562 return True 1563 elif issubclass(other, Ambiguous): 1564 return other._mod2(cls) 1565 else: 1566 return False
1567
1568 1569 -class Ambiguous(AbstractCut):
1570 """Implement the methods specific to the enzymes for which the overhang 1571 is variable. 1572 1573 Typical example : BstXI -> CCAN_NNNN^NTGG 1574 The overhang can be any sequence of 4 bases. 1575 Notes: 1576 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N 1577 There overhang is always the same : blunt! 1578 1579 Internal use only. Not meant to be instantiated. 1580 """ 1581 1582 @classmethod
1583 - def _drop(cls):
1584 """RE._drop() -> list. 1585 1586 for internal use only. 1587 1588 drop the site that are situated outside the sequence in linear 1589 sequence. modify the index for site in circular sequences. 1590 """ 1591 length = len(cls.dna) 1592 drop = itertools.dropwhile 1593 take = itertools.takewhile 1594 if cls.dna.is_linear(): 1595 cls.results = [x for x in drop(lambda x: x < 1, cls.results)] 1596 cls.results = [x for x in take(lambda x: x < 1597 length, cls.results)] 1598 else: 1599 for index, location in enumerate(cls.results): 1600 if location < 1: 1601 cls.results[index] += length 1602 else: 1603 break 1604 for index, location in enumerate(cls.results[::-1]): 1605 if location > length: 1606 cls.results[-(index + 1)] -= length 1607 else: 1608 break 1609 return
1610 1611 @classmethod
1612 - def is_defined(cls):
1613 """RE.is_defined() -> bool. 1614 1615 True if the sequence recognised and cut is constant, 1616 i.e. the recognition site is not degenerated AND the enzyme cut inside 1617 the site. 1618 1619 see also: 1620 RE.is_ambiguous() 1621 RE.is_unknown() 1622 """ 1623 return False
1624 1625 @classmethod
1626 - def is_ambiguous(cls):
1627 """RE.is_ambiguous() -> bool. 1628 1629 True if the sequence recognised and cut is ambiguous, 1630 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1631 the site. 1632 1633 see also: 1634 RE.is_defined() 1635 RE.is_unknown() 1636 """ 1637 return True
1638 1639 @classmethod
1640 - def is_unknown(cls):
1641 """RE.is_unknown() -> bool. 1642 1643 True if the sequence is unknown, 1644 i.e. the recognition site has not been characterised yet. 1645 1646 see also: 1647 RE.is_defined() 1648 RE.is_ambiguous() 1649 """ 1650 return False
1651 1652 @classmethod
1653 - def _mod2(cls, other):
1654 """RE._mod2(other) -> bool. 1655 1656 for internal use only 1657 1658 test for the compatibility of restriction ending of RE and other. 1659 """ 1660 # 1661 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1662 # 1663 if len(cls.ovhgseq) != len(other.ovhgseq): 1664 return False 1665 else: 1666 se = cls.ovhgseq 1667 for base in se: 1668 if base in 'ATCG': 1669 pass 1670 if base in 'N': 1671 se = '.'.join(se.split('N')) 1672 if base in 'RYWMSKHDBV': 1673 expand = '[' + matching[base] + ']' 1674 se = expand.join(se.split(base)) 1675 if re.match(se, other.ovhgseq): 1676 return True 1677 else: 1678 return False
1679 1680 @classmethod
1681 - def elucidate(cls):
1682 """RE.elucidate() -> str 1683 1684 return a representation of the site with the cut on the (+) strand 1685 represented as '^' and the cut on the (-) strand as '_'. 1686 ie: 1687 >>> EcoRI.elucidate() # 5' overhang 1688 'G^AATT_C' 1689 >>> KpnI.elucidate() # 3' overhang 1690 'G_GTAC^C' 1691 >>> EcoRV.elucidate() # blunt 1692 'GAT^_ATC' 1693 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1694 '? GTATAC ?' 1695 >>> 1696 """ 1697 f5 = cls.fst5 1698 f3 = cls.fst3 1699 length = len(cls) 1700 site = cls.site 1701 if cls.cut_twice(): 1702 re = 'cut twice, not yet implemented sorry.' 1703 elif cls.is_5overhang(): 1704 if f3 == f5 == 0: 1705 re = 'N^' + site + '_N' 1706 elif 0 <= f5 <= length and 0 <= f3 + length <= length: 1707 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1708 elif 0 <= f5 <= length: 1709 re = site[:f5] + '^' + site[f5:] + f3 * 'N' + '_N' 1710 elif 0 <= f3 + length <= length: 1711 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:] 1712 elif f3 + length < 0: 1713 re = 'N^' * abs(f5) * 'N' + '_' + abs(length + f3) * 'N' + site 1714 elif f5 > length: 1715 re = site + (f5 - length) * 'N' + '^' + (length + 1716 f3 - f5) * 'N' + '_N' 1717 else: 1718 re = 'N^' + abs(f5) * 'N' + site + f3 * 'N' + '_N' 1719 elif cls.is_blunt(): 1720 if f5 < 0: 1721 re = 'N^_' + abs(f5) * 'N' + site 1722 elif f5 > length: 1723 re = site + (f5 - length) * 'N' + '^_N' 1724 else: 1725 raise ValueError('%s.easyrepr() : error f5=%i' 1726 % (cls.name, f5)) 1727 else: 1728 if f3 == 0: 1729 if f5 == 0: 1730 re = 'N_' + site + '^N' 1731 else: 1732 re = site + '_' + (f5 - length) * 'N' + '^N' 1733 elif 0 < f3 + length <= length and 0 <= f5 <= length: 1734 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:] 1735 elif 0 < f3 + length <= length: 1736 re = site[:f3] + '_' + site[f3:] + (f5 - length) * 'N' + '^N' 1737 elif 0 <= f5 <= length: 1738 re = 'N_' + 'N' * (f3 + length) + site[:f5] + '^' + site[f5:] 1739 elif f3 > 0: 1740 re = site + f3 * 'N' + '_' + (f5 - f3 - length) * 'N' + '^N' 1741 elif f5 < 0: 1742 re = 'N_' + abs(f3 - f5 + length) * 'N' + '^' + abs(f5) * 'N' \ 1743 + site 1744 else: 1745 re = 'N_' + abs(f3 + length) * 'N' + site + (f5 - length) * \ 1746 'N' + '^N' 1747 return re
1748
1749 1750 -class NotDefined(AbstractCut):
1751 """Implement the methods specific to the enzymes for which the overhang 1752 is not characterised. 1753 1754 Correspond to NoCut and Unknown. 1755 1756 Internal use only. Not meant to be instantiated. 1757 """ 1758 1759 @classmethod
1760 - def _drop(cls):
1761 """RE._drop() -> list. 1762 1763 for internal use only. 1764 1765 drop the site that are situated outside the sequence in linear 1766 sequence. modify the index for site in circular sequences. 1767 """ 1768 if cls.dna.is_linear(): 1769 return 1770 else: 1771 length = len(cls.dna) 1772 for index, location in enumerate(cls.results): 1773 if location < 1: 1774 cls.results[index] += length 1775 else: 1776 break 1777 for index, location in enumerate(cls.results[:-1]): 1778 if location > length: 1779 cls.results[-(index + 1)] -= length 1780 else: 1781 break 1782 return
1783 1784 @classmethod
1785 - def is_defined(cls):
1786 """RE.is_defined() -> bool. 1787 1788 True if the sequence recognised and cut is constant, 1789 i.e. the recognition site is not degenerated AND the enzyme cut inside 1790 the site. 1791 1792 see also: 1793 RE.is_ambiguous() 1794 RE.is_unknown() 1795 """ 1796 return False
1797 1798 @classmethod
1799 - def is_ambiguous(cls):
1800 """RE.is_ambiguous() -> bool. 1801 1802 True if the sequence recognised and cut is ambiguous, 1803 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1804 the site. 1805 1806 see also: 1807 RE.is_defined() 1808 RE.is_unknown() 1809 """ 1810 return False
1811 1812 @classmethod
1813 - def is_unknown(cls):
1814 """RE.is_unknown() -> bool. 1815 1816 True if the sequence is unknown, 1817 i.e. the recognition site has not been characterised yet. 1818 1819 see also: 1820 RE.is_defined() 1821 RE.is_ambiguous()""" 1822 return True
1823 1824 @classmethod
1825 - def _mod2(cls, other):
1826 """RE._mod2(other) -> bool. 1827 1828 for internal use only 1829 1830 test for the compatibility of restriction ending of RE and other. 1831 """ 1832 # 1833 # Normally we should not arrive here. But well better safe than 1834 # sorry. 1835 # the overhang is not defined we are compatible with nobody. 1836 # could raise an Error may be rather than return quietly. 1837 # 1838 # return False 1839 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!" 1840 % (str(cls), str(other), str(cls)))
1841 1842 @classmethod
1843 - def elucidate(cls):
1844 """RE.elucidate() -> str 1845 1846 return a representation of the site with the cut on the (+) strand 1847 represented as '^' and the cut on the (-) strand as '_'. 1848 ie: 1849 >>> EcoRI.elucidate() # 5' overhang 1850 'G^AATT_C' 1851 >>> KpnI.elucidate() # 3' overhang 1852 'G_GTAC^C' 1853 >>> EcoRV.elucidate() # blunt 1854 'GAT^_ATC' 1855 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1856 '? GTATAC ?' 1857 >>> 1858 """ 1859 return '? %s ?' % cls.site
1860
1861 1862 -class Commercially_available(AbstractCut):
1863 # 1864 # Recent addition to Rebase make this naming convention uncertain. 1865 # May be better to says enzymes which have a supplier. 1866 # 1867 """Implement the methods specific to the enzymes which are commercially 1868 available. 1869 1870 Internal use only. Not meant to be instantiated. 1871 """ 1872 1873 @classmethod
1874 - def suppliers(cls):
1875 """RE.suppliers() -> print the suppliers of RE.""" 1876 for s in cls.suppl: 1877 print(suppliers_dict[s][0] + ',') 1878 return
1879 1880 @classmethod
1881 - def supplier_list(cls):
1882 """RE.supplier_list() -> list. 1883 1884 list of the supplier names for RE. 1885 """ 1886 return [v[0] for k, v in suppliers_dict.items() if k in cls.suppl]
1887 1888 @classmethod
1889 - def buffers(cls, supplier):
1890 """RE.buffers(supplier) -> string. 1891 1892 not implemented yet. 1893 """ 1894 return
1895 1896 @classmethod
1897 - def is_comm(cls):
1898 """RE.iscomm() -> bool. 1899 1900 True if RE has suppliers. 1901 """ 1902 return True
1903
1904 1905 -class Not_available(AbstractCut):
1906 """Implement the methods specific to the enzymes which are not commercially 1907 available. 1908 1909 Internal use only. Not meant to be instantiated. 1910 """ 1911 1912 @staticmethod
1913 - def suppliers():
1914 """RE.suppliers() -> print the suppliers of RE.""" 1915 return None
1916 1917 @classmethod
1918 - def supplier_list(cls):
1919 """RE.supplier_list() -> list. 1920 1921 list of the supplier names for RE. 1922 """ 1923 return []
1924 1925 @classmethod
1926 - def buffers(cls, supplier):
1927 """RE.buffers(supplier) -> string. 1928 1929 not implemented yet. 1930 """ 1931 raise TypeError("Enzyme not commercially available.")
1932 1933 @classmethod
1934 - def is_comm(cls):
1935 """RE.iscomm() -> bool. 1936 1937 True if RE has suppliers. 1938 """ 1939 return False
1940
1941 1942 ############################################################################### 1943 # # 1944 # Restriction Batch # 1945 # # 1946 ############################################################################### 1947 1948 1949 -class RestrictionBatch(set):
1950
1951 - def __init__(self, first=(), suppliers=()):
1952 """RestrictionBatch([sequence]) -> new RestrictionBatch.""" 1953 first = [self.format(x) for x in first] 1954 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]] 1955 set.__init__(self, first) 1956 self.mapping = dict.fromkeys(self) 1957 self.already_mapped = None
1958
1959 - def __str__(self):
1960 if len(self) < 5: 1961 return '+'.join(self.elements()) 1962 else: 1963 return '...'.join(('+'.join(self.elements()[:2]), 1964 '+'.join(self.elements()[-2:])))
1965
1966 - def __repr__(self):
1967 return 'RestrictionBatch(%s)' % self.elements()
1968
1969 - def __contains__(self, other):
1970 try: 1971 other = self.format(other) 1972 except ValueError: # other is not a restriction enzyme 1973 return False 1974 return set.__contains__(self, other)
1975
1976 - def __div__(self, other):
1977 return self.search(other)
1978
1979 - def __rdiv__(self, other):
1980 return self.search(other)
1981
1982 - def get(self, enzyme, add=False):
1983 """B.get(enzyme[, add]) -> enzyme class. 1984 1985 if add is True and enzyme is not in B add enzyme to B. 1986 if add is False (which is the default) only return enzyme. 1987 if enzyme is not a RestrictionType or can not be evaluated to 1988 a RestrictionType, raise a ValueError. 1989 """ 1990 e = self.format(enzyme) 1991 if e in self: 1992 return e 1993 elif add: 1994 self.add(e) 1995 return e 1996 else: 1997 raise ValueError('enzyme %s is not in RestrictionBatch' 1998 % e.__name__)
1999
2000 - def lambdasplit(self, func):
2001 """B.lambdasplit(func) -> RestrictionBatch . 2002 2003 the new batch will contains only the enzymes for which 2004 func return True. 2005 """ 2006 d = [x for x in filter(func, self)] 2007 new = RestrictionBatch() 2008 new._data = dict(zip(d, [True] * len(d))) 2009 return new
2010
2011 - def add_supplier(self, letter):
2012 """B.add_supplier(letter) -> add a new set of enzyme to B. 2013 2014 letter represents the suppliers as defined in the dictionary 2015 RestrictionDictionary.suppliers 2016 return None. 2017 raise a KeyError if letter is not a supplier code. 2018 """ 2019 supplier = suppliers_dict[letter] 2020 self.suppliers.append(letter) 2021 for x in supplier[1]: 2022 self.add_nocheck(eval(x)) 2023 return
2024
2025 - def current_suppliers(self):
2026 """B.current_suppliers() -> add a new set of enzyme to B. 2027 2028 return a sorted list of the suppliers which have been used to 2029 create the batch. 2030 """ 2031 suppl_list = sorted(suppliers_dict[x][0] for x in self.suppliers) 2032 return suppl_list
2033
2034 - def __iadd__(self, other):
2035 """ b += other -> add other to b, check the type of other.""" 2036 self.add(other) 2037 return self
2038
2039 - def __add__(self, other):
2040 """ b + other -> new RestrictionBatch.""" 2041 new = self.__class__(self) 2042 new.add(other) 2043 return new
2044
2045 - def remove(self, other):
2046 """B.remove(other) -> remove other from B if other is a 2047 RestrictionType. 2048 2049 Safe set.remove method. Verify that other is a RestrictionType or can 2050 be evaluated to a RestrictionType. 2051 raise a ValueError if other can not be evaluated to a RestrictionType. 2052 raise a KeyError if other is not in B. 2053 """ 2054 return set.remove(self, self.format(other))
2055
2056 - def add(self, other):
2057 """B.add(other) -> add other to B if other is a RestrictionType. 2058 2059 Safe set.add method. Verify that other is a RestrictionType or can be 2060 evaluated to a RestrictionType. 2061 raise a ValueError if other can not be evaluated to a RestrictionType. 2062 """ 2063 return set.add(self, self.format(other))
2064
2065 - def add_nocheck(self, other):
2066 """B.add_nocheck(other) -> add other to B. don't check type of other. 2067 """ 2068 return set.add(self, other)
2069
2070 - def format(self, y):
2071 """B.format(y) -> RestrictionType or raise ValueError. 2072 2073 if y is a RestrictionType return y 2074 if y can be evaluated to a RestrictionType return eval(y) 2075 raise a Value Error in all other case. 2076 """ 2077 try: 2078 if isinstance(y, RestrictionType): 2079 return y 2080 elif isinstance(eval(str(y)), RestrictionType): 2081 return eval(y) 2082 else: 2083 pass 2084 except (NameError, SyntaxError): 2085 pass 2086 raise ValueError('%s is not a RestrictionType' % y.__class__)
2087
2088 - def is_restriction(self, y):
2089 """B.is_restriction(y) -> bool. 2090 2091 True is y or eval(y) is a RestrictionType. 2092 """ 2093 return (isinstance(y, RestrictionType) or 2094 isinstance(eval(str(y)), RestrictionType))
2095
2096 - def split(self, *classes, **bool):
2097 """B.split(class, [class.__name__ = True]) -> new RestrictionBatch. 2098 2099 it works but it is slow, so it has really an interest when splitting 2100 over multiple conditions. 2101 """ 2102 def splittest(element): 2103 for klass in classes: 2104 b = bool.get(klass.__name__, True) 2105 if issubclass(element, klass): 2106 if b: 2107 continue 2108 else: 2109 return False 2110 elif b: 2111 return False 2112 else: 2113 continue 2114 return True
2115 d = [k for k in filter(splittest, self)] 2116 new = RestrictionBatch() 2117 new._data = dict(zip(d, [True] * len(d))) 2118 return new
2119
2120 - def elements(self):
2121 """B.elements() -> tuple. 2122 2123 give all the names of the enzymes in B sorted alphabetically. 2124 """ 2125 l = sorted(str(e) for e in self) 2126 return l
2127
2128 - def as_string(self):
2129 """B.as_string() -> list. 2130 2131 return a list of the name of the elements of B. 2132 """ 2133 return [str(e) for e in self]
2134 2135 @classmethod
2136 - def suppl_codes(cls):
2137 """B.suppl_codes() -> dict 2138 2139 letter code for the suppliers 2140 """ 2141 supply = dict((k, v[0]) for k, v in suppliers_dict.items()) 2142 return supply
2143 2144 @classmethod
2145 - def show_codes(cls):
2146 """B.show_codes() -> letter codes for the suppliers""" 2147 supply = [' = '.join(i) for i in cls.suppl_codes().items()] 2148 print('\n'.join(supply)) 2149 return
2150
2151 - def search(self, dna, linear=True):
2152 """B.search(dna) -> dict.""" 2153 # 2154 # here we replace the search method of the individual enzymes 2155 # with one unique testing method. 2156 # 2157 if not hasattr(self, "already_mapped"): 2158 # TODO - Why does this happen! 2159 # Try the "doctest" at the start of PrintFormat.py 2160 self.already_mapped = None 2161 if isinstance(dna, DNA): 2162 # For the searching, we just care about the sequence as a string, 2163 # if that is the same we can use the cached search results. 2164 # At the time of writing, Seq == method isn't implemented, 2165 # and therefore does object identity which is stricter. 2166 if (str(dna), linear) == self.already_mapped: 2167 return self.mapping 2168 else: 2169 self.already_mapped = str(dna), linear 2170 fseq = FormattedSeq(dna, linear) 2171 self.mapping = dict((x, x.search(fseq)) for x in self) 2172 return self.mapping 2173 elif isinstance(dna, FormattedSeq): 2174 if (str(dna), dna.linear) == self.already_mapped: 2175 return self.mapping 2176 else: 2177 self.already_mapped = str(dna), dna.linear 2178 self.mapping = dict((x, x.search(dna)) for x in self) 2179 return self.mapping 2180 raise TypeError("Expected Seq or MutableSeq instance, got %s instead" 2181 % type(dna))
2182
2183 ############################################################################### 2184 # # 2185 # Restriction Analysis # 2186 # # 2187 ############################################################################### 2188 2189 2190 -class Analysis(RestrictionBatch, PrintFormat):
2191
2192 - def __init__(self, restrictionbatch=RestrictionBatch(), sequence=DNA(''), 2193 linear=True):
2194 """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class. 2195 2196 For most of the method of this class if a dictionary is given it will 2197 be used as the base to calculate the results. 2198 If no dictionary is given a new analysis using the Restriction Batch 2199 which has been given when the Analysis class has been instantiated.""" 2200 RestrictionBatch.__init__(self, restrictionbatch) 2201 self.rb = restrictionbatch 2202 self.sequence = sequence 2203 self.linear = linear 2204 if self.sequence: 2205 self.search(self.sequence, self.linear)
2206
2207 - def __repr__(self):
2208 return 'Analysis(%s,%s,%s)' %\ 2209 (repr(self.rb), repr(self.sequence), self.linear)
2210
2211 - def _sub_set(self, wanted):
2212 """A._sub_set(other_set) -> dict. 2213 2214 Internal use only. 2215 2216 screen the results through wanted set. 2217 Keep only the results for which the enzymes is in wanted set. 2218 """ 2219 return dict((k, v) for k, v in self.mapping.items() if k in wanted)
2220
2221 - def _boundaries(self, start, end):
2222 """A._boundaries(start, end) -> tuple. 2223 2224 Format the boundaries for use with the methods that limit the 2225 search to only part of the sequence given to analyse. 2226 """ 2227 if not isinstance(start, int): 2228 raise TypeError('expected int, got %s instead' % type(start)) 2229 if not isinstance(end, int): 2230 raise TypeError('expected int, got %s instead' % type(end)) 2231 if start < 1: # Looks like this tries to do python list like indexing 2232 start += len(self.sequence) 2233 if end < 1: 2234 end += len(self.sequence) 2235 if start < end: 2236 pass 2237 else: 2238 start, end = end, start 2239 if start < end: 2240 return start, end, self._test_normal
2241
2242 - def _test_normal(self, start, end, site):
2243 """A._test_normal(start, end, site) -> bool. 2244 2245 Internal use only 2246 Test if site is in between start and end. 2247 """ 2248 return start <= site < end
2249
2250 - def _test_reverse(self, start, end, site):
2251 """A._test_reverse(start, end, site) -> bool. 2252 2253 Internal use only 2254 Test if site is in between end and start (for circular sequences). 2255 """ 2256 return start <= site <= len(self.sequence) or 1 <= site < end
2257
2258 - def format_output(self, dct=None, title='', s1=''):
2259 """A.format_output([dct[, title[, s1]]]) -> dct. 2260 2261 If dct is not given the full dictionary is used. 2262 """ 2263 if not dct: 2264 dct = self.mapping 2265 return PrintFormat.format_output(self, dct, title, s1)
2266
2267 - def print_that(self, dct=None, title='', s1=''):
2268 """A.print_that([dct[, title[, s1[,print_]]]]) -> print the results 2269 from dct. 2270 2271 If dct is not given the full dictionary is used. 2272 This method prints the output of A.format_output() and it is here 2273 for backwards compatibility. 2274 """ 2275 print(self.format_output(dct, title, s1))
2276
2277 - def change(self, **what):
2278 """A.change(**attribute_name) -> Change attribute of Analysis. 2279 2280 It is possible to change the width of the shell by setting 2281 self.ConsoleWidth to what you want. 2282 self.NameWidth refer to the maximal length of the enzyme name. 2283 2284 Changing one of these parameters here might not give the results 2285 you expect. In which case, you can settle back to a 80 columns shell 2286 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until 2287 you get it right. 2288 """ 2289 for k, v in what.items(): 2290 if k in ('NameWidth', 'ConsoleWidth'): 2291 setattr(self, k, v) 2292 self.Cmodulo = self.ConsoleWidth % self.NameWidth 2293 self.PrefWidth = self.ConsoleWidth - self.Cmodulo 2294 elif k is 'sequence': 2295 setattr(self, 'sequence', v) 2296 self.search(self.sequence, self.linear) 2297 elif k is 'rb': 2298 self = Analysis.__init__(self, v, self.sequence, self.linear) 2299 elif k is 'linear': 2300 setattr(self, 'linear', v) 2301 self.search(self.sequence, v) 2302 elif k in ('Indent', 'Maxsize'): 2303 setattr(self, k, v) 2304 elif k in ('Cmodulo', 'PrefWidth'): 2305 raise AttributeError( 2306 'To change %s, change NameWidth and/or ConsoleWidth' % k) 2307 else: 2308 raise AttributeError('Analysis has no attribute %s' % k) 2309 return
2310
2311 - def full(self, linear=True):
2312 """A.full() -> dict. 2313 2314 Full Restriction Map of the sequence. 2315 """ 2316 return self.mapping
2317
2318 - def blunt(self, dct=None):
2319 """A.blunt([dct]) -> dict. 2320 2321 Only the enzymes which have a 3'overhang restriction site. 2322 """ 2323 if not dct: 2324 dct = self.mapping 2325 return dict((k, v) for k, v in dct.items() if k.is_blunt())
2326
2327 - def overhang5(self, dct=None):
2328 """A.overhang5([dct]) -> dict. 2329 2330 Only the enzymes which have a 5' overhang restriction site. 2331 """ 2332 if not dct: 2333 dct = self.mapping 2334 return dict((k, v) for k, v in dct.items() if k.is_5overhang())
2335
2336 - def overhang3(self, dct=None):
2337 """A.Overhang3([dct]) -> dict. 2338 2339 Only the enzymes which have a 3'overhang restriction site. 2340 """ 2341 if not dct: 2342 dct = self.mapping 2343 return dict((k, v) for k, v in dct.items() if k.is_3overhang())
2344
2345 - def defined(self, dct=None):
2346 """A.defined([dct]) -> dict. 2347 2348 Only the enzymes that have a defined restriction site in Rebase. 2349 """ 2350 if not dct: 2351 dct = self.mapping 2352 return dict((k, v) for k, v in dct.items() if k.is_defined())
2353
2354 - def with_sites(self, dct=None):
2355 """A.with_sites([dct]) -> dict. 2356 2357 Enzymes which have at least one site in the sequence. 2358 """ 2359 if not dct: 2360 dct = self.mapping 2361 return dict((k, v) for k, v in dct.items() if v)
2362
2363 - def without_site(self, dct=None):
2364 """A.without_site([dct]) -> dict. 2365 2366 Enzymes which have no site in the sequence. 2367 """ 2368 if not dct: 2369 dct = self.mapping 2370 return dict((k, v) for k, v in dct.items() if not v)
2371
2372 - def with_N_sites(self, N, dct=None):
2373 """A.With_N_Sites(N [, dct]) -> dict. 2374 2375 Enzymes which cut N times the sequence. 2376 """ 2377 if not dct: 2378 dct = self.mapping 2379 return dict((k, v) for k, v in dct.items()if len(v) == N)
2380
2381 - def with_number_list(self, list, dct=None):
2382 if not dct: 2383 dct = self.mapping 2384 return dict((k, v) for k, v in dct.items() if len(v) in list)
2385
2386 - def with_name(self, names, dct=None):
2387 """A.with_name(list_of_names [, dct]) -> 2388 2389 Limit the search to the enzymes named in list_of_names. 2390 """ 2391 for i, enzyme in enumerate(names): 2392 if enzyme not in AllEnzymes: 2393 warnings.warn("no data for the enzyme: %s" % enzyme, 2394 BiopythonWarning) 2395 del names[i] 2396 if not dct: 2397 return RestrictionBatch(names).search(self.sequence, self.linear) 2398 return dict((n, dct[n]) for n in names if n in dct)
2399
2400 - def with_site_size(self, site_size, dct=None):
2401 """A.with_site_size(site_size [, dct]) -> 2402 2403 Limit the search to the enzymes whose site is of size <site_size>. 2404 """ 2405 sites = [name for name in self if name.size == site_size] 2406 if not dct: 2407 return RestrictionBatch(sites).search(self.sequence) 2408 return dict((k, v) for k, v in dct.items() if k in site_size)
2409
2410 - def only_between(self, start, end, dct=None):
2411 """A.only_between(start, end[, dct]) -> dict. 2412 2413 Enzymes that cut the sequence only in between start and end. 2414 """ 2415 start, end, test = self._boundaries(start, end) 2416 if not dct: 2417 dct = self.mapping 2418 d = dict(dct) 2419 for key, sites in dct.items(): 2420 if not sites: 2421 del d[key] 2422 continue 2423 for site in sites: 2424 if test(start, end, site): 2425 continue 2426 else: 2427 del d[key] 2428 break 2429 return d
2430
2431 - def between(self, start, end, dct=None):
2432 """A.between(start, end [, dct]) -> dict. 2433 2434 Enzymes that cut the sequence at least in between start and end. 2435 They may cut outside as well. 2436 """ 2437 start, end, test = self._boundaries(start, end) 2438 d = {} 2439 if not dct: 2440 dct = self.mapping 2441 for key, sites in dct.items(): 2442 for site in sites: 2443 if test(start, end, site): 2444 d[key] = sites 2445 break 2446 continue 2447 return d
2448
2449 - def show_only_between(self, start, end, dct=None):
2450 """A.show_only_between(start, end [, dct]) -> dict. 2451 2452 Enzymes that cut the sequence outside of the region 2453 in between start and end but do not cut inside. 2454 """ 2455 d = [] 2456 if start <= end: 2457 d = [(k, [vv for vv in v if start <= vv <= end]) 2458 for k, v in self.between(start, end, dct).items()] 2459 else: 2460 d = [(k, [vv for vv in v if start <= vv or vv <= end]) 2461 for k, v in self.between(start, end, dct).items()] 2462 return dict(d)
2463
2464 - def only_outside(self, start, end, dct=None):
2465 """A.only_outside(start, end [, dct]) -> dict. 2466 2467 Enzymes that cut the sequence outside of the region 2468 in between start and end but do not cut inside. 2469 """ 2470 start, end, test = self._boundaries(start, end) 2471 if not dct: 2472 dct = self.mapping 2473 d = dict(dct) 2474 for key, sites in dct.items(): 2475 if not sites: 2476 del d[key] 2477 continue 2478 for site in sites: 2479 if test(start, end, site): 2480 del d[key] 2481 break 2482 else: 2483 continue 2484 return d
2485
2486 - def outside(self, start, end, dct=None):
2487 """A.outside((start, end [, dct]) -> dict. 2488 2489 Enzymes that cut outside the region in between start and end. 2490 No test is made to know if they cut or not inside this region. 2491 """ 2492 start, end, test = self._boundaries(start, end) 2493 if not dct: 2494 dct = self.mapping 2495 d = {} 2496 for key, sites in dct.items(): 2497 for site in sites: 2498 if test(start, end, site): 2499 continue 2500 else: 2501 d[key] = sites 2502 break 2503 return d
2504
2505 - def do_not_cut(self, start, end, dct=None):
2506 """A.do_not_cut(start, end [, dct]) -> dict. 2507 2508 Enzymes that do not cut the region in between start and end. 2509 """ 2510 if not dct: 2511 dct = self.mapping 2512 d = self.without_site() 2513 d.update(self.only_outside(start, end, dct)) 2514 return d
2515 2516 2517 # 2518 # The restriction enzyme classes are created dynamically when the module is 2519 # imported. Here is the magic which allow the creation of the 2520 # restriction-enzyme classes. 2521 # 2522 # The reason for the two dictionaries in Restriction_Dictionary 2523 # one for the types (which will be called pseudo-type as they really 2524 # correspond to the values that instances of RestrictionType can take) 2525 # and one for the enzymes is efficiency as the bases are evaluated 2526 # once per pseudo-type. 2527 # 2528 # However Restriction is still a very inefficient module at import. But 2529 # remember that around 660 classes (which is more or less the size of Rebase) 2530 # have to be created dynamically. However, this processing take place only 2531 # once. 2532 # This inefficiency is however largely compensated by the use of metaclass 2533 # which provide a very efficient layout for the class themselves mostly 2534 # alleviating the need of if/else loops in the class methods. 2535 # 2536 # It is essential to run Restriction with doc string optimisation (-OO 2537 # switch) as the doc string of 660 classes take a lot of processing. 2538 # 2539 CommOnly = RestrictionBatch() # commercial enzymes 2540 NonComm = RestrictionBatch() # not available commercially 2541 for TYPE, (bases, enzymes) in typedict.items(): 2542 # 2543 # The keys are the pseudo-types TYPE (stored as type1, type2...) 2544 # The names are not important and are only present to differentiate 2545 # the keys in the dict. All the pseudo-types are in fact RestrictionType. 2546 # These names will not be used after and the pseudo-types are not 2547 # kept in the locals() dictionary. It is therefore impossible to 2548 # import them. 2549 # Now, if you have look at the dictionary, you will see that not all the 2550 # types are present as those without corresponding enzymes have been 2551 # removed by Dictionary_Builder(). 2552 # 2553 # The values are tuples which contain 2554 # as first element a tuple of bases (as string) and 2555 # as second element the names of the enzymes. 2556 # 2557 # First eval the bases. 2558 # 2559 bases = tuple(eval(x) for x in bases) 2560 # 2561 # now create the particular value of RestrictionType for the classes 2562 # in enzymes. 2563 # 2564 T = type.__new__(RestrictionType, 'RestrictionType', bases, {}) 2565 for k in enzymes: 2566 # 2567 # Now, we go through all the enzymes and assign them their type. 2568 # enzymedict[k] contains the values of the attributes for this 2569 # particular class (self.site, self.ovhg,....). 2570 # 2571 newenz = T(k, bases, enzymedict[k]) 2572 # 2573 # we add the enzymes to the corresponding batch. 2574 # 2575 # No need to verify the enzyme is a RestrictionType -> add_nocheck 2576 # 2577 if newenz.is_comm(): 2578 CommOnly.add_nocheck(newenz) 2579 else: 2580 NonComm.add_nocheck(newenz) 2581 # 2582 # AllEnzymes is a RestrictionBatch with all the enzymes from Rebase. 2583 # 2584 AllEnzymes = CommOnly | NonComm 2585 # 2586 # Now, place the enzymes in locals so they can be imported. 2587 # 2588 names = [str(x) for x in AllEnzymes] 2589 try: 2590 del x # noqa 2591 except NameError: 2592 # Scoping changed in Python 3, the variable isn't leaked 2593 pass 2594 locals().update(dict(zip(names, AllEnzymes))) 2595 __all__ = ('FormattedSeq', 'Analysis', 'RestrictionBatch', 'AllEnzymes', 2596 'CommOnly', 'NonComm') + tuple(names) 2597 del k, enzymes, TYPE, bases, names 2598